org 100h

  push 0xa000 ; sp=-4

  push 0x8000
  pop ds      ; table: cos
  push 0x7000
  pop fs      ; table: 1/cos
;  push 0x6000
;  pop gs      ; backbuffer
  xor bp,bp

;Cos table with 16384 entries
  fninit
COS_TAB:
  imul bx,[bp+di],4 ; bx=[ss:bp+di]=[ss:-2]=angle (0 on init)
  fild word[bp+di]
  fidiv word[bp+C16K_DIV_2PI]
  fcos           ;; cos(angle/65536*2pi): adjust period to 2pi
  fst dword[bx]
  fidivr word[bp+C2] ;; 2/cos(...)
  fstp dword[fs:bx]
  inc word[bp+di] ; next angle (sp=-4)
  jnz COS_TAB     ; bx=4


  mov ax,0x4f02
  mov bx,0x117
  int 10h       ; 1024x768 16-bit mode

  lea bp,[bx+si]
%define d(xx) dword[byte bp+xx-0x217]
%define w(xx) word[byte bp+xx-0x217]

  pop es

M:
  fldln2         ;; 0.6931

  fld d(T)       ;; t 0.6931
  fadd d(DELTA_T)
  fst d(T)       ; t += delta_t

  fmul st1,st0   ;; t t2=0.6931*t
  fsincos        ;; c=cos(t) s=sin(t) t2=0.6931*t

  fld st2
  fcos           ;; cos(t2) c s t2
  fidiv w(C10)   ;; cos(t2)/10 c s t2
  fisubr w(C1)   ;; scale=1-cos(t2)/10 c s t2
  fmul st1,st0
  fmulp st2,st0  ;; C=c*scale S=s*scale t2
  fstp d(C)
  fstp d(S)
  fstp d(T2)

  xor dx,dx  ; page: 0..23 | y: 0..768
P:
  mov ax,0x4f05  ; each line: set window, assume 64kB granularity
  xor bx,bx      ; bh=0 bl=window=0 dx=page
  int 10h

  xor di,di  ; di=pixel adr

             ; x: 0..1024

; dx:di address
; cx loop
; bp vars[]
; si 0x100
; ax,bx scratch
Q:
  fldz
  fldz
  fldz
  fldz       ; K=0 R=0 G=0 B=0

  mov ax,di
  shrd ax,dx,11
  sub ax,768/2     ; ax = y[-384..383]
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C384) ;; y[-1..1] K R G B

  mov ax,di
  shr ax,1
  and ax,1023
  sub ax,1024/2    ; ax = x[-512..511]
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C384) ;; x[-1.33..1.33] y[-1..1] K R G B

  stc
  call LEN
  fmul d(CHALF)  ;; d=length(x,y)/2 x y K R G B
  fstp d(D)      ;; x y K R G B

  mov cl,16     ; cx = i
I:
; rotate and scale
  ;[x] = [C -S]*[x]
  ;[y]   [S  C] [y]

  fld st1         ;; y x y K R G B    | x Sy x Cy K R G B
  fmul d(C)       ;; Cy x y K R G B   | Cx Sy x Cy K R G B
  fxch st2        ;; y x Cy K R G B   | x Sy Cx Cy K R G B
  fmul d(S)       ;; Sy x Cy K R G B  | Sx Sy Cx Cy K R G B
  cmc
  jnc I
  faddp st3,st0  ;; Sy Cx Sx+Cy K R G B
  fsubp st1,st0  ;; x=Cx-Sy y=Sx+Cy K R G B

; square fold for now
FOLD:
  fsub d(CHALF) ;; x=x-0.5 y=y-0.5 K R G B
  fist dword[bp+si]
  fisub dword[bp+si]
  fxch st1
  cmc
  jnc FOLD      ;; x=x-round(x) y=y-round(y) K R G B

  ; interfering concentric circles
  call LEN
  fimul w(C5)   ;; 5*length(x,y) x y K R G B
  fadd d(D)
  fldl2e
  fmul d(T)
  fsubp st1,st0 ;; 5*length(x,y)+d-1.4427*t x y K R G B

  fimul w(C16K_DIV_2PI)
  fistp dword[bp+si]
  imul bx,[bp+si],4
  fld dword[fs:bx]  ;; k=2/cos(5*length(x,y) + d - 1.4427*t) x y K R G B
  fadd st3,st0      ;; k x y K+=k R G B


  ; RGB += k * ( 0.5 + cos(3*(i/100 - d + t2) + [2 1 0]) );
  fld d(T2)
  fsub d(D)
  fimul w(C16K_DIV_2PI)  ;; Q=(t2 - d)*16384/2pi k x y K R G B
  fistp dword[bp+si]  ;; k x y K R G B
  imul bx,[bp+si],12 ; bx = (t2 - d) *3*65536/2pi
  imul ax,cx,312
  add bx,ax     ;; bx += (t2 - d + i/100) *3*65536/2pi

  fld dword[bx]  ;; cos(q)
  fmul st1      ;; k*cos(q) k x y K R G B
  faddp st7,st0 ;; k x y K R G B+=k*cos(q)

  fld dword[bx+2607*4]
  fmul st1
  faddp st6,st0

  fmul dword[bx+2607*4+2607*4]
  faddp st4,st0 ;; x y K R G B

  stc
  loop I

  fcompp        ;; K/=2 R G B
  fmul d(CHALF)
  fadd st1,st0
  fadd st2,st0
  faddp st3,st0 ;; R+=K G+=K B+=K

  ; RGB = RGB*RGB/256;  // square the sum for better contrast
  mov cl,3
COL:
  fmul st0
  fistp word[bp+si]
  mov bx,[bp+si]
  cmp bh,31
  jb NO_CLAMP
  mov bh,31
NO_CLAMP:
  shl ax,5
  add al,bh        ; .rrr|rrgg|gggb|bbbb
  loop COL
  shl ax,1         ; rrrr|rggg|ggbb|bbb.
  sub al,bh        ; rrrr|rggg|gg.b|bbbb

  stosw
  stosw  ; 2x faster

  test di,di
  jnz Q

  inc dx
  cmp dl,768/32
  jb P

  in al,60h ; ESC check
  cmp al,1
  jne M

  mov ax,3 ; text mode
  int 10h
  ret

LEN: ;; x y -> r=sqrt(x*x+y*y)  ; assume cf=0
  fld st1
  fmul st0
  cmc
  jnc LEN
  faddp st1,st0
  fsqrt
  ret


C1    dw 1
C2    dw 2
C5    dw 5
C10   dw 10
C384  dw 384
C16K_DIV_2PI dw 2608 ; 16384/2pi

CHALF dd 0.5

DELTA_T dd 0.25
T       dd 0.0

section .bss

C resd 1
S resd 1
T2 resd 1
D resd 1
